# -------------------------------------------------------------------
#       -*- coding: utf-8 -*-
#   @Project    :   spider_biqushen
#   @File       :   main.py
#   @Author     :   WANGYU
#   @Time       :   2021-09-01 11:18:36
#   @Software   :   PyCharm
#   @Desc       :   
# -------------------------------------------------------------------


import requests
import time
from bs4 import BeautifulSoup


def get_html(novel_url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
                      ' (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
    }
    proxies = {
        'http': 'http://125.77.130.17:80',
        'https': 'https://125.77.130.17:80'
    }
    response = requests.get(novel_url, headers=headers)
    response.encoding = 'utf-8'
    return response


def get_book():
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
                      ' (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
    }
    url = 'https://www.biqushen.cc/book/20771/0.html'
    # url = 'https://www.biqushen.cc/book/26585/0.html'
    # url = input('输入要下载的小说地址：')
    resp = get_html(url)
    page = BeautifulSoup(resp.text, 'html.parser')
    booktext = page.find('div', attrs={'id': 'BookText'})
    title = page.find('div', attrs={'class': 'crumbs'}).findAll('a')[2].text
    print(booktext.text)
    p = page.find('div', attrs={'class': 'link xb'})
    a = p.findAll('a')
    child_href = 'https://www.biqushen.cc' + a[2].get('href')
    print(child_href)
    with open('./files/' + title + '.txt', 'w', encoding='utf-8') as novel:
        novel.writelines(booktext.text)

    while child_href:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
                          ' (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'
        }
        child_resp = requests.get(child_href, headers=headers)
        child_page = BeautifulSoup(child_resp.text, 'html.parser')
        child_book = child_page.find('div', attrs={'id': 'BookText'})
        print(child_book.text)
        with open('./files/' + title + '.txt', 'a') as novel:
            novel.writelines(child_book.text)
        child_p = child_page.find('div', attrs={'class': 'link xb'})
        child_a = child_p.findAll('a')
        child_href = 'https://www.biqushen.cc' + child_a[2].get('href')
        print(child_href)
        time.sleep(3)


if __name__ == '__main__':
    get_book()
